library(dplyr)
library(tidyr)
library(ordinal)

hyr_data <- read.csv("raw data.csv")

# filters out participants with missing data points
hyr_data <- hyr_data %>%
  filter(!hyr_baseline == "") %>%
  filter(!cr_baseline == "") %>%
  filter(!hyr_island == "") %>%
  filter(!cr_island == "") %>%
  filter(!hyr_a_minimality == "") %>%
  filter(!Q588 == "")

# deletes rows with superfluous information
hyr_data <- hyr_data[-2, ]
hyr_data <- hyr_data[-1, ]

# adds participant id
hyr_data$participant <- row_number(hyr_data)

# filters out fillers and unnecessary information
hyr_data <- hyr_data[, c(12,13,14,15,16,17,18,19,20,34)]

# replaces ratings in Serbian with numeric values
hyr_data <- hyr_data %>%
  mutate_all(~ ifelse(. == "skoro neprihvatljivo", 2, .))
hyr_data <- hyr_data %>%
  mutate_all(~ ifelse(. == "neprihvatljivo", 1, .))
hyr_data <- hyr_data %>%
  mutate_all(~ ifelse(. == "neutralno", 3, .))
hyr_data <- hyr_data %>%
  mutate_all(~ ifelse(. == "skoro prihvatljivo", 4, .))
hyr_data <- hyr_data %>%
  mutate_all(~ ifelse(. == "prihvatljivo", 5, .))

# reorganizes the data such that all questions occur in a single column
hyr_data <- hyr_data %>%
  gather(key = "question", value = "rating", hyr_baseline, cr_baseline, hyr_island, cr_island, hyr_a_minimality, Q588)

# adds new variables based on "question": "construction" and "condition"
hyr_data <- hyr_data %>%
  mutate(construction = ifelse(grepl("hyr", question), "hyperraising", "copy_raising")) %>%
  mutate(condition = ifelse(grepl("baseline", question), "baseline", ifelse(grepl("island", question), "island", "object")))

# removes the "question" variable (unnecessary)
hyr_data <- hyr_data[ ,-5]

hyr_data$rating <- as.numeric(hyr_data$rating)

# adds the raiser status variable
hyr_data <- hyr_data %>%
  group_by(participant) %>%
  mutate(raiser_status = ifelse(all(rating[condition == "baseline" & construction %in% c("hyperraising", "copy_raising")] >= 4), "raiser",
                         "nonraiser")) %>%
  ungroup()

# excludes non-raisers, i.e. speaker who reject either hyperraising or copy raising in the baseline condition
hyr_data_raisers <- hyr_data %>%
  filter(!raiser_status == "nonraiser")

# ordinal regression analysis
model_raisers <- clmm(as.factor(rating) ~ condition*construction + (1 | participant), data = hyr_data_raisers)
summary(model_raisers)

write.csv(hyr_data, "hyperraising data clean.csv", fileEncoding = "UTF-8")